package data_deepprocessing.algorithm.bootstrapping;

import java.util.List;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import data_deepprocessing.algorithm.bootstrapping.beans.Pattern2BSBean;
import data_deepprocessing.algorithm.bootstrapping.beans.Seed2BSBean;
import data_deepprocessing.algorithm.bootstrapping.beans.SentenceBean;
import data_deepprocessing.algorithm.bootstrapping.service.BootstrappingPatternService;
import data_deepprocessing.algorithm.bootstrapping.service.BootstrappingSeedService;
import data_deepprocessing.algorithm.bootstrapping.service.BootstrappingSentenceService;

/**
 * @author Yuanyuhu 描述： 1：这里重新实现了bootstrapping,原始的程序见项目CriticalMoment
 *         2：为防止一句话中重复出现种子词，加了do while 语句
 *         3：定位问题，之前的定位也是正确的，不过该程序中定位是在一句话中，因此删除了一部分定位的内容。
 * 
 */
public class BootstrappingService {
	static Logger logger = Logger.getLogger(BootstrappingService.class.getName());
	private BootstrappingSeedService bootstrappingSeedService;
	private BootstrappingPatternService bootstrappingPatternService;
	private BootstrappingSentenceService bootstrappingSentenceService;

	public void bootstrappingMain(List<SentenceBean> sentenceInfos, Integer prestart, Integer behstart, int frequencyLimit) {
		try {
			generatePatterns(sentenceInfos, prestart, behstart, frequencyLimit);
			generateSeeds(sentenceInfos, frequencyLimit);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	public void generateInitSeed(int frequencyLimit) {
		bootstrappingSeedService.doGenerateInitSeed(frequencyLimit);
	}

	public List<SentenceBean> getUsefulSentence() {
		return bootstrappingSentenceService.doFindBootstrappingValidSentence();
	}

	/**
	 * @author 作者 : YUHU YUAN @date 创建时间：2017年2月25日
	 *         上午11:45:47 @parameter @return @throws 描述： 1：这里面 加入了do while
	 *         机制，主要是学习句子中的剩余的部分有没有种子词； 2：这里面，count 主要是对subSentence的修正
	 *         3：flag用来区分sentence or subSentence
	 * 
	 */
	private void generatePatterns(List<SentenceBean> sentenceInfos, Integer prestart, Integer behstart, int frequencyLimit) {
		logger.info("现在开始生成模式");
		List<Seed2BSBean> seedList = bootstrappingSeedService.doFindBootstrappingValidSeed(frequencyLimit);
		if (seedList == null) {
			return;
		}
		System.out.println("这次迭代有多少的种子：" + seedList.size());
		for (Seed2BSBean seed : seedList) {
			String seedContent = seed.getSeed_content();
			if(seedContent==null || "".equals(seedContent)){
				continue;
			}
			logger.info("生成pattern：目前的种子的内容为：" + seedContent);
			String left_pat = "";
			String right_pat = "";
			for (SentenceBean sentenceBean : sentenceInfos) {
				String sentence = sentenceBean.getSentence();
				System.out.println(sentence);
				if(sentence==null  || "".equals(sentence)){
					continue;
				}
				String subSentence = "";
				int count = 0;
				boolean flag = false;
				int seedStartPosition = sentence.indexOf(seedContent);
				if (seedStartPosition >= 0) {
					do {
						if (!"".equals(subSentence)) {
							sentence = subSentence;
							seedStartPosition = sentence.indexOf(seedContent);
							flag = true;
						}
						Pattern2BSBean patternEntity = new Pattern2BSBean();
						if (seedStartPosition - prestart <= 0) {
							left_pat = sentence.substring(0, seedStartPosition);
						} else {
							left_pat = sentence.substring(seedStartPosition - prestart, seedStartPosition);
						}
						if (seedStartPosition - prestart <= 0) {
							if (!flag) {
								patternEntity.setLeft_locate(0 + "");
							} else {
								patternEntity.setLeft_locate(0 + count + "");
							}
						} else {
							if (!flag) {
								patternEntity.setLeft_locate(seedStartPosition - prestart + "");
							} else {
								patternEntity.setLeft_locate(seedStartPosition - prestart + count + "");
							}
						}
						patternEntity.setLeft_pat(left_pat);
						if (seedStartPosition + seedContent.length() + behstart >= sentence.length()) {
							right_pat = sentence.substring(seedStartPosition + seedContent.length(), sentence.length());
						} else {
							right_pat = sentence.substring(seedStartPosition + seedContent.length(),
									seedStartPosition + seedContent.length() + behstart);
						}
						patternEntity.setRight_pat(right_pat);
						if (!flag) {
							patternEntity.setRight_locate(seedStartPosition + seedContent.length() + "");
						} else {
							patternEntity.setRight_locate(seedStartPosition + seedContent.length() + count + "");
						}
						patternEntity.setFreqcount(1);
						patternEntity.setMatched(0);
						patternEntity.setRefcount(1);
						patternEntity.setSentence_id(sentenceBean.getId()+"");
						bootstrappingPatternService.doUpdateBootStrappingPatt(patternEntity);
						// 这里主要是防止一句话中有好几个种子词
						subSentence = sentence.substring(sentence.indexOf(seedContent) + seedContent.length());
						count += sentence.indexOf(seedContent) + seedContent.length();
						flag = false;
					} while (subSentence.contains(seedContent));
				}
			}
			System.out.println("生成pattern后,更改种子的match=============================");
			seed.setMatched(1);
			bootstrappingSeedService.doUpdateBootStrappingSeedMatch(seed);
		}
	}

	/**
	 * @author 作者 : YUHU YUAN @date 创建时间：2017年2月25日
	 *         上午11:45:38 @parameter @return @throws
	 */
	private void generateSeeds(List<SentenceBean> sentenceInfos, int frequencylimit) {
		logger.info("现在开始生成种子====================");
		List<Pattern2BSBean> pattList = bootstrappingPatternService.doFindBootstrappingValidPattern(frequencylimit);
		if (pattList == null) {
			return;
		}
		System.out.println("这次迭代参与的模式数量 ：" + pattList.size());
		for (Pattern2BSBean patternRuleInfo : pattList) {
			String left_pat = patternRuleInfo.getLeft_pat();
			String right_pat = patternRuleInfo.getRight_pat();
			if ("".equals(left_pat) || "".equals(right_pat) || left_pat == null || right_pat == null
					||left_pat.length()<2||right_pat.length()<2) {
				continue;
			}
			Pattern pPatt = Pattern.compile(left_pat + "(.*?)" + right_pat);
			for (SentenceBean sentenceBean : sentenceInfos) {
				extractSeedInfo(sentenceBean, pPatt, patternRuleInfo);
			}
			System.out.println("生成种子后，修改模式的match==================");
			patternRuleInfo.setMatched(1);
			bootstrappingPatternService.doUpdateBootStrappingPatternMatch(patternRuleInfo);
		}
	}

	/**
	 * @author 作者 : YUHU YUAN
	 * @date 创建时间：2017年2月26日 上午10:50:43
	 * @parameter
	 * @return
	 * @throws 这里只是把种子词个提取出来，但是，并没有记录下来相关的信息，特别是位置信息
	 * 
	 */
	private void extractSeedInfo(SentenceBean sentenceBean, Pattern pPatt, Pattern2BSBean patternRuleInfo) {
		String sentence = sentenceBean.getSentence();
		Matcher matcher = pPatt.matcher(sentence);
		while (matcher.find()) {
			String word = matcher.group(1);
			if (word.contains(patternRuleInfo.getLeft_pat())) {
				word = candidateWordHanded(word, pPatt, patternRuleInfo.getLeft_pat(), patternRuleInfo.getRight_pat());
			}
			if (!word.equals("") && word != null) {
				Seed2BSBean seed2bsBean = new Seed2BSBean();
				seed2bsBean.setRefcount(1);
				seed2bsBean.setFreqcount(1);
				seed2bsBean.setMatched(0);
				seed2bsBean.setSeed_start(
						sentence.indexOf(patternRuleInfo.getLeft_pat() + word + patternRuleInfo.getRight_pat())
								+ patternRuleInfo.getLeft_pat().length() + "");
				seed2bsBean.setPattern_id(patternRuleInfo.getPattern_id() + "");
				seed2bsBean.setSeed_content(word);
				seed2bsBean.setSeed_class("模式生成的种子");
				seed2bsBean.setSentence_id(sentenceBean.getId()+"");
				seed2bsBean.setIs_check(0);
				bootstrappingSeedService.doUpdateBootStrappingSeed(seed2bsBean);
			}
		}
	}

	/**
	 * @author 作者 : YUHU YUAN
	 * @date 创建时间：2017年2月26日 上午10:50:46
	 * @parameter
	 * @return
	 * @throws 处理这种情况：
	 *             left_pat = "伴有"; right_pat = "活动"; tempSeedContent =
	 *             "伴有  消瘦S同  伴有  时  伴有  口干活动后胸闷";
	 * 
	 */
	private String candidateWordHanded(String candidateWord, Pattern pPatt, String left_pat, String right_pat) {
		Matcher matcher = pPatt.matcher(candidateWord + right_pat);
		while (matcher.find()) {
			String word = matcher.group(1);
			if (!word.contains(left_pat)) {
				return word;
			} else {
				return candidateWordHanded(word, pPatt, left_pat, right_pat);
			}
		}
		return "";
	}

	public BootstrappingSeedService getBootstrappingSeedService() {
		return bootstrappingSeedService;
	}

	public void setBootstrappingSeedService(BootstrappingSeedService bootstrappingSeedService) {
		this.bootstrappingSeedService = bootstrappingSeedService;
	}

	public BootstrappingPatternService getBootstrappingPatternService() {
		return bootstrappingPatternService;
	}

	public void setBootstrappingPatternService(BootstrappingPatternService bootstrappingPatternService) {
		this.bootstrappingPatternService = bootstrappingPatternService;
	}

	public BootstrappingSentenceService getBootstrappingSentenceService() {
		return bootstrappingSentenceService;
	}

	public void setBootstrappingSentenceService(BootstrappingSentenceService bootstrappingSentenceService) {
		this.bootstrappingSentenceService = bootstrappingSentenceService;
	}
}
